library(tidyverse)
library(ggplot2)
Select the years to analyse!
energy_load <- read.csv("../data/load_15-24.csv")
energy_load$date <- as.POSIXct(energy_load$date, tz = "UTC")
Summary statistics
summary(energy_load)
## date load hour_int
## Min. :2015-01-01 00:00:00.00 Min. :30909 Min. : 0.0
## 1st Qu.:2017-04-28 06:30:00.00 1st Qu.:48110 1st Qu.: 6.0
## Median :2019-08-24 12:00:00.00 Median :56021 Median :12.0
## Mean :2019-08-24 11:34:26.74 Mean :56317 Mean :11.5
## 3rd Qu.:2021-12-19 16:30:00.00 3rd Qu.:64622 3rd Qu.:17.5
## Max. :2024-04-15 23:00:00.00 Max. :81078 Max. :23.0
## weekday_int month_int working_day
## Min. :1.000 Min. : 1.000 Mode :logical
## 1st Qu.:2.000 1st Qu.: 3.000 FALSE:23279
## Median :4.000 Median : 6.000 TRUE :58152
## Mean :4.001 Mean : 6.392
## 3rd Qu.:6.000 3rd Qu.: 9.000
## Max. :7.000 Max. :12.000
paste("SD for load: ", sapply(energy_load, sd, na.rm = TRUE)[3], " MWh")
## [1] "SD for load: 6.9221915040762 MWh"
Overview of the whole time series
energy_load |>
ggplot(aes(date, load)) +
geom_line() +
labs(
x = "Time",
y = "Load in MWh",
title = "Loads over the years 2015 - 2024",
)
The average load per hour for the whole time series
energy_load |>
group_by(hour_int) |>
summarise(mean_load = mean(load)) |>
ggplot(aes(x = hour_int, y = mean_load)) +
geom_point() +
geom_line() +
labs(
x = "Hour of the day",
y = "Mean Load",
title = "Average Load over the years 2015-2024 per day",
)
The average load per hour grouped by year
# Add a new column for the year
# Group by hour and year, then calculate the mean load
avg_load_by_hour_year <- energy_load |>
mutate(year = year(date)) |>
group_by(hour_int, year) |>
summarise(mean_load = mean(load))
## `summarise()` has grouped output by 'hour_int'. You can override using the
## `.groups` argument.
# Plot
avg_load_by_hour_year |>
ggplot(aes(x = hour_int, y = mean_load, colour = factor(year))) +
geom_point() +
geom_line() +
labs(
x = "Hour of the day",
y = "Mean Load",
title = "Average Load over the years 2015-2024 per day",
)
Boxplot of the load per day for the weeks from 2022 - 2024
energy_load |>
ggplot(aes(x = as.factor(weekday_int), y = load)) +
geom_boxplot() +
labs(x = "Weekday", y = "Load in MWh", title = "Load per weekday from 2015 - 2024")
Boxplot of the load per month for periode from 2022 - 2024
energy_load |>
ggplot(aes(x = as.factor(month_int), y = load)) +
geom_boxplot() +
labs(x = "Month", y = "Load in MWh", title = "Load per month from 2015 - 2024")
Load over the whole time period by months and grouped by working day
month <- 2
energy_load |>
filter(month_int == month) |>
ggplot(aes(hour_int, load, color = working_day)) +
geom_point() +
labs(
x = "Hour of the day",
y = "Load in MWh",
title = "Load per hour for the period 2015 - 2024",
subtitle = paste("grouped by working day and filtered by month ", month)
)
Overview of a time series of loads per Month for a given year and month
# --- Get the time series of loads for a given year and month ---
year <- 2024
month <- 1
# Create the figure
energy_load |>
filter(year(date) == year & month_int == month) |>
mutate(week_day_hour = weekday_int + hour_int / 24, week_int = week(date)) |>
ggplot(aes(x = week_day_hour, y = load, colour = factor(week_int))) +
geom_point(size = 0.5) +
geom_line() +
scale_x_continuous(breaks = 1:7, labels = c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")) +
labs(
x = "Day of the week",
y = "Load in MWh",
colour = "Week of the month",
title = paste("Load per hour for the year ", year, " and the month ", month)
)
A histogramm of the data for the whole time series
ggplot(energy_load, aes(load)) +
geom_histogram(binwidth = 500) +
labs(
x = "Load in MWh",
title = "Histogramm of all data points from 2015 - 2024",
)
A histogramm of the data grouped by working days and weekends
ggplot(energy_load, aes(load)) +
geom_histogram(binwidth = 500) +
labs(
title = "Histogramm of Loads for 2015 - 2024 for Working Days",
x = "Load in MWh"
) +
facet_wrap(~working_day)
Getting the peaks
peaks <- energy_load |>
group_by(as.Date(date)) |>
slice(which.max(load))
Overview of peaks over time
peaks |>
ggplot(aes(date, load)) +
geom_line() +
labs(
x = "Time",
y = "Load in MWh",
title = "Load peaks over the years 2015 - 2024",
)
Histogramm of the peaks
peaks |>
ggplot(aes(x = load)) +
geom_histogram(binwidth = 250) +
labs(
title = "Histogramm of load peaks from 2015 to 2024",
x = "Load in MWh"
)
Histogramm of the peaks grouped by working days
peaks |>
ggplot(aes(load)) +
geom_histogram(binwidth = 250) +
labs(
title = "Histogramm of Loads for 2015 - 2024 for Working Days",
x = "Load in MWh"
) +
facet_wrap(~working_day)
Histogramm of the peaks based on hours of the day
peaks |>
ggplot(aes(hour_int)) +
geom_histogram(binwidth = 1) +
labs(
title = "Histogramm of Peak Loads for 2015 - 2024 per hour of the day",
x = "Hour of the day"
)
Scatterplot of the load peaks per hour grouped by working days
peaks |>
ggplot(aes(x = hour_int, y = load, color = working_day)) +
geom_point() +
scale_x_continuous(breaks = seq(min(peaks$hour_int), max(peaks$hour_int), by = 1)) +
labs(
title = "Scatterplot of Load peaks from 2015 - 2024",
x = "Hour of the day",
y = "Load in MWh"
)
Density of the peaks
ggplot(peaks, aes(x = load)) +
geom_density() +
labs(
title = "Density of the load peaks 2015 - 2024",
x = "Load in MWh"
)
Density of the peaks grouped by working days
ggplot(peaks, aes(x = load)) +
geom_density() +
labs(
title = "Density of the load peaks 2015 - 2024 grouped by working days",
x = "Load in MWh"
) +
facet_wrap(~working_day)
Boxplot of the peaks per day for the weeks from 2022 - 2024
peaks |>
ggplot(aes(x = as.factor(weekday_int), y = load)) +
geom_boxplot() +
labs(x = "Weekday", y = "Load in MWh", title = "Daily peak per weekday from 2015 - 2024")
Boxplot of the peaks per month for periode from 2022 - 2024
peaks |>
ggplot(aes(x = as.factor(month_int), y = load)) +
geom_boxplot() +
labs(x = "Month", y = "Load in MWh", title = "Peak load per month from 2015 - 2024")
library(MASS)
library(plotly)
Density of energy load on hourly resolution
den3d <- kde2d(energy_load$load, energy_load$hour_int)
plot_ly(x = den3d$x, y = den3d$y, z = den3d$z) |> add_surface()
Density of load peaks per day for working days
peaks_working <- peaks |>
filter(working_day == TRUE)
peaks_working |>
ggplot(aes(load)) +
geom_density() +
labs(
title = "Density of the load peaks for the day grouped by hour for working days",
x = "Load in MWh"
) +
facet_wrap(~hour_int)
Depicting the density of hour 11
peaks_working |>
filter(hour_int == 11) |>
ggplot(aes(load)) +
geom_density() +
labs(
title = "Density of the load peaks on Working days at hour 11",
x = "Load in MWh"
)
Depicting the density of hour 12
peaks_working |>
filter(hour_int == 12) |>
ggplot(aes(load)) +
geom_density() +
labs(
title = "Density of the load peaks on Working days at hour 12",
x = "Load in MWh"
)
Depicting the density of hour 13
peaks_working |>
filter(hour_int == 13) |>
ggplot(aes(load)) +
geom_density() +
labs(
title = "Density of the load peaks on Working days at hour 13",
x = "Load in MWh"
)
Density of load peaks per day for weekends
peaks_weekends <- peaks |>
filter(working_day == FALSE)
peaks_weekends |>
ggplot(aes(load)) +
geom_density() +
labs(
title = "Density of the load peaks for the day grouped by hour for weekends",
x = "Load in MWh"
) +
facet_wrap(~hour_int)
## Warning: Groups with fewer than two data points have been dropped.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
Depicting the density of hour 18
peaks_weekends |>
filter(hour_int == 18) |>
ggplot(aes(load)) +
geom_density() +
labs(
title = "Density of the load peaks for the weekends at hour 18",
x = "Load in MWh"
)
Depicting the density of hour 19
peaks_weekends |>
filter(hour_int == 19) |>
ggplot(aes(load)) +
geom_density() +
labs(
title = "Density of the load peaks for the weekends at hour 19",
x = "Load in MWh"
)
Depicting the density of hour 20
peaks_weekends |>
filter(hour_int == 20) |>
ggplot(aes(load)) +
geom_density() +
labs(
title = "Density of the load peaks for the weekends at hour 20",
x = "Load in MWh"
)